###############################################################
###############################################################
#### Stefan Müller, Samuel Brazys, and Alexander Dukalskis
#### Replication Scripts for: 
#### Discourse Wars and 'Mask Diplomacy': China's Global Image Management in Times of Crisis 
#### Political Research Exchange, 2024
#### Link to paper: https://doi.org/10.1080/2474736X.2024.2309178
###############################################################
###############################################################

## Note: check the 000_README.pdf file on Harvard Dataverse for 
## the full replication instructions and information on all code scripts.
## Link to Dataverse repository: https://doi.org/10.7910/DVN/KRXMXJ
## Please contact the authors if you have any questions or suggestions. 
## Note: due to copyright restrictions some of the files cannot be shared publicly.
## However, we provide all replication scripts and intermediate objects to reproduce
## the plots and tables included in the paper and Supporting Information.

## This file runs the difference-in-difference (DiD) models and plots the results.

# load packages
library(did)
library(dplyr)
library(readr)
library(ggplot2)
library(stringr)
library(forcats)

# load custom ggplot2 scheme
source("function_theme_base.R")


# get all files in new folder
files <- list.files("data_did/")
files_clean <- files[-(length(files))]
files_clean

# number of files in new folder
length(files_clean)

head(files_clean)
files_clean

# function to get did estimates
get_att_dynamic <- function(x) {
    data_estimates <- data.frame(
        type = x$type,
        overall_att = x$overall.att,
        overall_se = x$overall.se,
        egt = x$egt,
        egt_att = x$att.egt,
        egt_se = x$se.egt,
        n = x$DIDparams$n
    )
    
    return(data_estimates)
}



# empty data frame for analysis
dat_atts_all <- data.frame()

length(files_clean)

for (i in files_clean) {
    
    cat("Analyzing", i, "\n")
    
    data <- read.csv(paste0("data_did/", i),
                     header = TRUE)
    
    #data <- filter(data, min_t != "99999")
    
    mw_attgt <- att_gt(yname = "std_mean_score",
                       gname = "min_t",
                       idname = "ccode_num",
                       tname = "week2",    
                       xformla = ~1,
                       bstrap = FALSE,
                       data = data,
                       control_group ="notyettreated",
                       allow_unbalanced_panel = TRUE
    )
    
    
    mw_dyn_5 <- aggte(mw_attgt, 
                      type = "dynamic",
                      min_e = -5,
                      max_e = 5,
                      bstrap = TRUE,
                      balance_e = 5,
                      na.rm = TRUE)
    
    mw_dyn_4 <- aggte(mw_attgt, 
                      type = "dynamic",
                      min_e = -4,
                      max_e = 4,
                      bstrap = TRUE,
                      balance_e = 4,
                      na.rm = TRUE)
    
    mw_dyn_3 <- aggte(mw_attgt, 
                      type = "dynamic",
                      min_e = -3,
                      max_e = 3,
                      bstrap = TRUE,
                      balance_e = 3,
                      na.rm = TRUE)
    
    mw_dyn_2 <- aggte(mw_attgt, 
                      type = "dynamic",
                      min_e = -2,
                      max_e = 2,
                      bstrap = TRUE,
                      balance_e = 2,
                      na.rm = TRUE)
    
    # convert all estimates (weekly and overall)
    # to a data frame and store the egt window
    dat_5_dyn <- get_att_dynamic(mw_dyn_5) %>% 
        mutate(egt_window = "±5")
    
    dat_4_dyn <- get_att_dynamic(mw_dyn_4) %>% 
        mutate(egt_window = "±4")
    
    dat_3_dyn <- get_att_dynamic(mw_dyn_3) %>% 
        mutate(egt_window = "±3")
    
    dat_2_dyn <- get_att_dynamic(mw_dyn_2) %>% 
        mutate(egt_window = "±2")
    
    dat_atts_dyn <- bind_rows(dat_5_dyn,
                              dat_4_dyn,
                              dat_3_dyn,
                              dat_2_dyn)
    
    # store information on dataset
    dat_atts_dyn$data <- i
    
    # bind all analyses
    dat_atts_all <- bind_rows(dat_atts_dyn,
                              dat_atts_all)
    
}

# store all estimates
write_csv(dat_atts_all, "data_figs_did.csv")

# load estimates
dat_atts_all <- read_csv("data_figs_did.csv")

# add metadata on relevant datasets
dat_atts_all_clean <- dat_atts_all %>% 
    mutate(model_clean = case_when(
        str_detect(data, "noRMC") ~ "Substantive Events",
        str_detect(data, "r_ENGLISH") ~ "Full Sample",
        str_detect(data, "r_FULL") ~ "Full Sample",
        str_detect(data, "nomask") ~ "No Mask Diplomacy Content",
    )) %>% 
    mutate(mask_events = ifelse(str_detect(data, "_nrmc"), "Substantive Support", "All Types of Support")) %>% 
    mutate(language = ifelse(str_detect(data, "ENGLISH"), "English", "English and Translated"))


# construct confidence intervals
dat_overall_plot <- dat_atts_all_clean %>% 
    mutate(overall_ci_lower90 = overall_att - 1.645 * overall_se,
           overall_ci_upper90 = overall_att + 1.645 * overall_se,
           overall_ci_lower95 = overall_att - 1.96 * overall_se,
           overall_ci_upper95 = overall_att + 1.96 * overall_se) 

table(dat_overall_plot$n)


# for the "overall" estimates, we only need one
# row per est model; we can achieve this by
# keeping only unique values after filtering relevant columns
dat_overall_plot <- dat_overall_plot %>% 
    dplyr::select(c(egt_window, starts_with("overall"),
             mask_events, language,
             data, n, model_clean)) %>% 
    unique() |> 
    filter(!is.na(model_clean))

# Figure 05
ggplot(filter(dat_overall_plot, language == "English"),
       aes(x = forcats::fct_rev(egt_window),
           y = overall_att)) +
    geom_text(aes(label = round(overall_att, 2)),
              nudge_x = 0.3, size = 4.5) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "red") +
    geom_point(size = 3,
               position = position_dodge(width = 0.5)) + 
    geom_linerange(aes(ymin = overall_ci_lower90,
                       ymax = overall_ci_upper90),
                   size = 1.4,
                   position = position_dodge(width = 0.5)) +
    geom_linerange(aes(ymin = overall_ci_lower95,
                       ymax = overall_ci_upper95),
                   size = 0.8,
                   position = position_dodge(width = 0.5)) +
    labs(x = "Weeks Around Treatment",
         y = "Estimated ATT on China Media Tone in Standard Deviations") +
    facet_wrap(~model_clean) + 
    scale_y_continuous(limits = c(-0.1, 1)) +
    coord_flip() +
    theme(legend.position = "none",
          legend.title = element_blank())
ggsave("fig_05.pdf",
       width = 9.5, height = 4)
ggsave("fig_05.png",
       dpi = 300,
       width = 9.5, height = 4)
ggsave("fig_05.eps",
       device = "eps",
       width = 9.5, height = 4)

# distinguish between democracies and non-democracies
dat_dem <- dat_atts_all_clean |>
    filter(str_detect(data, "edem_")) |> # keep democracy splits
    filter(str_detect(data, "_E.csv")) |> 
    mutate(overall_ci_lower90 = overall_att - 1.645 * overall_se,
           overall_ci_upper90 = overall_att + 1.645 * overall_se,
           overall_ci_lower95 = overall_att - 1.96 * overall_se,
           overall_ci_upper95 = overall_att + 1.96 * overall_se) |> 
    mutate(type = ifelse(str_detect(data, "nrmc"),
                         "No Mask Diplomacy Content",
                         "Full Sample")) |> 
    mutate(dem_non_dem = ifelse(str_detect(data, "noedem"),
                                "Electoral Democracy Score:\nBelow Median",
                                "Electoral Democracy Score:\nAbove Median"))

# Figure 06
ggplot(dat_dem,
       aes(x = forcats::fct_rev(egt_window),
           y = overall_att)) +
    geom_text(aes(label = round(overall_att, 2)),
              nudge_x = 0.3, size = 4.5) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "red") +
    geom_point(size = 3,
               position = position_dodge(width = 0.5)) + 
    geom_linerange(aes(ymin = overall_ci_lower90,
                       ymax = overall_ci_upper90),
                   size = 1.4,
                   position = position_dodge(width = 0.5)) +
    geom_linerange(aes(ymin = overall_ci_lower95,
                       ymax = overall_ci_upper95),
                   size = 0.8,
                   position = position_dodge(width = 0.5)) +
    labs(x = "Weeks Around Treatment",
         y = "Estimated ATT on China Media Tone in Standard Deviations") +
    facet_grid(type~dem_non_dem) +
    coord_flip() +
    theme(legend.position = "none",
          legend.title = element_blank())
ggsave("fig_06.pdf",
       width = 9.5, height = 7)
ggsave("fig_06.png",
       dpi = 300,
       width = 9.5, height = 7)
ggsave("fig_06.eps",
       device = "eps",
       width = 9.5, height = 7)






## plot dynamic weekly scores

dat_atts_all_plot <- dat_atts_all_clean %>% 
    mutate(egt_ci_lower90 = egt_att - 1.645 * egt_se,
           egt_ci_upper90 = egt_att + 1.645 * egt_se,
           egt_ci_lower95 = egt_att - 1.96 * egt_se,
           egt_ci_upper95 = egt_att + 1.96 * egt_se) %>% 
    filter(egt_window == "±5") %>% 
    mutate(treated = ifelse(egt >= 0, "Post-Treatment", "Pre-Treatment"))


dat_atts_all_plot$treated <- forcats::fct_rev(dat_atts_all_plot$treated)


dat_atts_all_plot <- dat_atts_all_plot %>% 
    mutate(model_clean_label = str_remove_all(model_clean, "\\(English\\)")) %>% 
    mutate(model_clean_label = str_remove_all(model_clean_label, "Sample\\: ")) %>% 
    mutate(model_clean_label = dplyr::recode(
        model_clean_label, "Full " = "Full Sample"
    ))

table(dat_atts_all_plot$model_clean_label)

# Figure A04
ggplot(filter(dat_atts_all_plot, language == "English"),
       aes(x = factor(egt),
           y = egt_att,
           colour = treated,
           shape = treated)) +
    geom_hline(yintercept = 0, linetype = "dashed") +
    geom_point(size = 3.5) + 
    geom_linerange(aes(ymin = egt_ci_lower90,
                       ymax = egt_ci_upper90),
                   size = 1.4) +
    geom_linerange(aes(ymin = egt_ci_lower95,
                       ymax = egt_ci_upper95),
                   size = 0.8) +
    facet_wrap(~model_clean_label, nrow = 1) +
    labs(x = "Week Relative to Mask Diplomacy Treatment",
         y = "ATT on China Media Tone") +
    scale_shape_manual(values = c(1, 16)) +
    scale_colour_manual(values = c("darkred", "darkgreen")) +
    theme(legend.position = "bottom",
          legend.title = element_blank())
ggsave("fig_a04.pdf",
       width = 9.5, height = 5)
ggsave("fig_a04.png",
       dpi = 300,
       width = 9.5, height = 5)


# English + English + translated
# Figure A05
ggplot(filter(dat_overall_plot),
       aes(x = forcats::fct_rev(egt_window),
           y = overall_att)) +
    geom_text(aes(label = round(overall_att, 2)),
              nudge_x = 0.3, size = 4.5) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "red") +
    geom_point(size = 3,
               position = position_dodge(width = 0.5)) + 
    geom_linerange(aes(ymin = overall_ci_lower90,
                       ymax = overall_ci_upper90),
                   size = 1.4,
                   position = position_dodge(width = 0.5)) +
    geom_linerange(aes(ymin = overall_ci_lower95,
                       ymax = overall_ci_upper95),
                   size = 0.8,
                   position = position_dodge(width = 0.5)) +
    labs(x = "Weeks Around Treatment",
         y = "Estimated ATT on China Media Tone in Standard Deviations") +
    facet_grid(language~model_clean) + 
    coord_flip() +
    theme(legend.position = "none",
          legend.title = element_blank())
ggsave("fig_a05.pdf",
       width = 9.5, height = 7)
ggsave("fig_a05.png",
       dpi = 300,
       width = 9.5, height = 7)

